In [1]:
import mxnet as mx
from mxnet import gluon
import numpy as np
from tqdm import tqdm_notebook as tqdm
mx.random.seed(1)
In [2]:
ctx = mx.gpu()
In [3]:
batch_size = 64
In [4]:
def transform(data, label):
return mx.nd.transpose(data.astype(np.float32), (2,0,1)) / 255, label.astype(np.float32)
In [5]:
train_data = mx.gluon.data.DataLoader(dataset=mx.gluon.data.vision.MNIST(train=True, transform=transform),
batch_size=batch_size,
shuffle=True)
test_data = mx.gluon.data.DataLoader(dataset=mx.gluon.data.vision.MNIST(train=False, transform=transform),
batch_size=batch_size,
shuffle=False)
In [6]:
from mxnet.gluon import nn
def vgg_block(num_convs, channels):
out = nn.Sequential()
for _ in range(num_convs):
out.add(nn.Conv2D(channels=channels,
kernel_size=[3, 3],
padding=1,
activation='relu'))
out.add(nn.MaxPool2D(pool_size=2,
strides=2))
return out
def vgg_stack(architecture):
out = nn.Sequential()
for (num_convs, channels) in architecture:
out.add(vgg_block(num_convs, channels))
return out
num_outputs = 10
architecture = ((1,64), (1,128), (2,256), (2,512))
net = nn.Sequential()
with net.name_scope():
net.add(vgg_stack(architecture))
net.add(nn.Flatten())
net.add(nn.Dense(units=512,
activation="relu"))
net.add(nn.Dropout(rate=0.5))
net.add(nn.Dense(units=512,
activation="relu"))
net.add(nn.Dropout(rate=0.5))
net.add(nn.Dense(units=num_outputs))
In [7]:
print(net)
In [8]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
In [9]:
trainer = gluon.Trainer(params=net.collect_params(),
optimizer='sgd',
optimizer_params={'learning_rate': .05})
In [10]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [11]:
def evaluate_accuracy(data_iterator, net):
acc = mx.metric.Accuracy()
for d, l in data_iterator:
data = d.as_in_context(ctx)
label = l.as_in_context(ctx)
output = net(data)
predictions = mx.nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
In [12]:
epochs = 1
smoothing_constant = .01
for e in range(epochs):
for i, (d, l) in tqdm(enumerate(train_data)):
data = d.as_in_context(ctx)
label = l.as_in_context(ctx)
with mx.autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
##########################
# Keep a moving average of the losses
##########################
curr_loss = mx.nd.mean(loss).asscalar()
moving_loss = (curr_loss if ((i == 0) and (e == 0))
else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
if i > 0 and i % 200 == 0:
print('Batch %d. Loss: %f' % (i, moving_loss))
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))